import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from gym.envs.registration import register

class StochasticContinuousMountainCarEnv(gym.Env):
    """
    A stochastic version of the Continuous MountainCar environment.
    We add:
      - action_noise_scale: Gaussian noise added to the chosen engine force
      - dynamics_noise_scale: Gaussian noise added to velocity updates
      - obs_noise_scale: Gaussian noise on the returned observations
    """
    metadata = {
        "render.modes": ["human", "rgb_array"],
        "video.frames_per_second": 30
    }

    def __init__(
        self,
        action_noise_scale=0.0,
        dynamics_noise_scale=0.0,
        obs_noise_scale=0.0
    ):
        super().__init__()
        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.45

        # Original environment constants
        self.force = 0.001
        self.gravity = 0.0025

        self.low_state = np.array([self.min_position, -self.max_speed], dtype=np.float32)
        self.high_state = np.array([self.max_position, self.max_speed], dtype=np.float32)

        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(1,), dtype=np.float32
        )
        self.observation_space = spaces.Box(
            self.low_state, self.high_state, dtype=np.float32
        )

        # Noise parameters
        self.action_noise_scale = action_noise_scale
        self.dynamics_noise_scale = dynamics_noise_scale
        self.obs_noise_scale = obs_noise_scale

        self.seed()
        self.reset()
        self.viewer = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        # 1) Add noise to action
        raw_action = float(action[0])
        noisy_action = raw_action + self.np_random.normal(0.0, self.action_noise_scale)
        # Clip to [-1, 1]
        force = np.clip(noisy_action, -1.0, 1.0)

        position, velocity = self.state

        # 2) Update velocity + add dynamics noise if requested
        velocity += force * self.force - math.cos(3 * position) * self.gravity
        if self.dynamics_noise_scale > 0.0:
            velocity += self.np_random.normal(0.0, self.dynamics_noise_scale)

        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
        position += velocity
        position = np.clip(position, self.min_position, self.max_position)

        # If car hits left boundary, velocity = 0
        if position <= self.min_position and velocity < 0:
            velocity = 0

        done = bool(position >= self.goal_position)

        # Reward: 100 for goal, minus action^2 * 0.001
        reward = 0.0
        if done:
            reward = 100.0
        reward -= (force ** 2) * 0.001

        self.state = np.array([position, velocity], dtype=np.float32)

        # 3) Observations with possible noise
        obs = np.array(self.state, dtype=np.float32)
        if self.obs_noise_scale > 0.0:
            obs += self.np_random.normal(0.0, self.obs_noise_scale, size=obs.shape).astype(np.float32)

        return obs, reward, done, {}

    def reset(self):
        self.state = np.array([
            self.np_random.uniform(low=-0.6, high=-0.4),
            0.0
        ], dtype=np.float32)

        # Add obs noise on reset if desired
        obs = np.array(self.state, dtype=np.float32)
        if self.obs_noise_scale > 0.0:
            obs += self.np_random.normal(0.0, self.obs_noise_scale, obs.shape).astype(np.float32)

        return obs

    def render(self, mode="human"):
        screen_width = 600
        screen_height = 400

        world_width = self.max_position - self.min_position
        scale = screen_width / world_width
        car_width = 40
        car_height = 20

        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)

            # Track
            xs = np.linspace(self.min_position, self.max_position, 100)
            ys = np.array([self._height(x) for x in xs])
            xys = list(zip(
                (xs - self.min_position) * scale,
                ys * scale + 100
            ))
            self.track = rendering.make_polyline(xys)
            self.track.set_linewidth(2)
            self.viewer.add_geom(self.track)

            # Car transform
            self.car_transform = rendering.Transform()
            l, r, t, b = -car_width/2, car_width/2, car_height, 0
            car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
            car.set_color(0.0, 0.0, 1.0)
            car.add_attr(self.car_transform)
            self.viewer.add_geom(car)

            front_wheel = rendering.make_circle(car_height / 2.5)
            front_wheel.set_color(0.5, 0.5, 0.5)
            front_wheel.add_attr(rendering.Transform(translation=(car_width / 4, 0)))
            front_wheel.add_attr(self.car_transform)
            self.viewer.add_geom(front_wheel)

            back_wheel = rendering.make_circle(car_height / 2.5)
            back_wheel.set_color(0.5, 0.5, 0.5)
            back_wheel.add_attr(rendering.Transform(translation=(-car_width / 4, 0)))
            back_wheel.add_attr(self.car_transform)
            self.viewer.add_geom(back_wheel)

        pos = self.state[0]
        carx = (pos - self.min_position) * scale
        self.car_transform.set_translation(carx, self._height(pos)*scale + 100)
        self.car_transform.set_rotation(
            math.atan2(
                self._height(pos + 0.01) - self._height(pos - 0.01),
                0.02
            )
        )

        return self.viewer.render(return_rgb_array=(mode == "rgb_array"))

    def _height(self, x):
        return np.sin(3 * x) * 0.45 + 0.55

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None

# ---------------------------------------------------------------
# Register the new environment under a different ID, e.g.:
# ---------------------------------------------------------------
register(
    id="StochasticMountainCarContinuous-v0",
    entry_point="continuous_mountain_car:StochasticContinuousMountainCarEnv",
    max_episode_steps=999,
    reward_threshold=90.0,
)
